{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "论文降重.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyPVTP1tLa5udIU1UbdZ0+IG",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ChinaYiqun/ChineseAug/blob/master/%E8%AE%BA%E6%96%87%E9%99%8D%E9%87%8D.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uj3VfViP_X0L",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 146
        },
        "outputId": "b29894f8-a8b6-41a2-f695-718acc3feb5d"
      },
      "source": [
        "!pip install synonyms"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: synonyms in /usr/local/lib/python3.6/dist-packages (3.10.2)\n",
            "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from synonyms) (1.4.1)\n",
            "Requirement already satisfied: six>=1.11.0 in /usr/local/lib/python3.6/dist-packages (from synonyms) (1.12.0)\n",
            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.6/dist-packages (from synonyms) (0.9.0)\n",
            "Requirement already satisfied: numpy>=1.13.1 in /usr/local/lib/python3.6/dist-packages (from synonyms) (1.18.4)\n",
            "Requirement already satisfied: scikit-learn>=0.19.1 in /usr/local/lib/python3.6/dist-packages (from synonyms) (0.22.2.post1)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.6/dist-packages (from scikit-learn>=0.19.1->synonyms) (0.15.1)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iTEVhWtV94M5",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 202
        },
        "outputId": "120ed5e2-bfa3-4788-85a1-37fd624541d3"
      },
      "source": [
        "import pandas as pd\n",
        "import jieba\n",
        "import synonyms\n",
        "# from google.colab import drive\n",
        "# drive.mount('/content/drive')"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Building prefix dict from /usr/local/lib/python3.6/dist-packages/synonyms/data/vocab.txt ...\n",
            "Loading model from cache /tmp/jieba.ue7f9decb3b81a822f15f4292d4358480.cache\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            ">> Synonyms load wordseg dict [/usr/local/lib/python3.6/dist-packages/synonyms/data/vocab.txt] ... \n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "Loading model cost 1.483 seconds.\n",
            "Prefix dict has been built successfully.\n"
          ],
          "name": "stderr"
        },
        {
          "output_type": "stream",
          "text": [
            ">> Synonyms on loading stopwords [/usr/local/lib/python3.6/dist-packages/synonyms/data/stopwords.txt] ...\n",
            ">> Synonyms on loading vectors [/usr/local/lib/python3.6/dist-packages/synonyms/data/words.vector] ...\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/smart_open/smart_open_lib.py:253: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function\n",
            "  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vP27OdHLCrkG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 54
        },
        "outputId": "0e6be734-d8f0-4715-b879-c4afcc99798d"
      },
      "source": [
        "# 更改比例 小 中 大 s m l\n",
        "# 更改程度 小 中 大 s m l\n",
        "def change(text,rate = 1,level = 2):\n",
        "  i = 0\n",
        "  while(i < level):\n",
        "    i += 1\n",
        "    seg_list = list(jieba.cut(text, cut_all=False))\n",
        "    for i in range(len(seg_list)):\n",
        "      s = seg_list[i]\n",
        "      try:\n",
        "        if synonyms.nearby(s)[0]!= None:\n",
        "          s = synonyms.nearby(s)[0][2]\n",
        "      except:\n",
        "        pass\n",
        "      seg_list[i] = s\n",
        "        \n",
        "    text = ''.join(seg_list)\n",
        "  return text\n",
        "\n",
        "test='最新、破解程序、免费游戏、互联网资讯、帮你解决关于电脑和Android的一切问题 ！'\n",
        "print(test)\n",
        "change(test,level = 2)\n",
        "  \n",
        "  \n"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "最新、破解程序、免费游戏、互联网资讯、帮你解决关于电脑和Android的一切问题 ！\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'不断更新、破译流程、预约该游戏、因特网讯息、帮忙你彻底解决关于个人电脑和智能手机的一切原因 ！'"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 3
        }
      ]
    }
  ]
}